commit 672628fb19da7b1b93ab08eaa288fdb3ef5897d9
Author: Yusuke Suzuki <ysuzuki@apple.com>
Date:   Wed Feb 22 13:41:16 2023 -0800

    [JSC] Fix SIMD in new BBQ
    https://bugs.webkit.org/show_bug.cgi?id=252736
    rdar://105769837
    
    Reviewed by Mark Lam.
    
    Fix SIMD ops in new BBQ.
    
    1. Some ops uses possibly-overwritten FPR registers. Fixed with scratch registers.
    2. Clean up AirLowerMacros's generation.
    3. SIMD splat can take Int constants. Thus we should check and load constants.
    
    * Source/JavaScriptCore/b3/air/AirLowerMacros.cpp:
    (JSC::B3::Air::lowerMacros):
    * Source/JavaScriptCore/wasm/WasmBBQJIT.cpp:
    (JSC::Wasm::BBQJIT::addSIMDSplat):
    (JSC::Wasm::BBQJIT::addSIMDI_V):
    (JSC::Wasm::BBQJIT::addSIMDV_V):
    (JSC::Wasm::BBQJIT::addSIMDRelOp):
    (JSC::Wasm::BBQJIT::emitVectorMul):
    
    Canonical link: https://commits.webkit.org/260700@main

Index: webkit2gtk-2.39.91/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp
===================================================================
--- webkit2gtk-2.39.91.orig/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp
+++ webkit2gtk-2.39.91/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp
@@ -167,8 +167,6 @@
                 Tmp rhsLower = code.newTmp(GP);
                 Tmp rhsUpper = code.newTmp(GP);
 
-                Tmp tmp = code.newTmp(FP);
-
                 insertionSet.insert(instIndex, VectorExtractLaneInt64, origin, Arg::imm(0), lhs, lhsLower);
                 insertionSet.insert(instIndex, VectorExtractLaneInt64, origin, Arg::imm(1), lhs, lhsUpper);
                 insertionSet.insert(instIndex, VectorExtractLaneInt64, origin, Arg::imm(0), rhs, rhsLower);
@@ -176,10 +174,9 @@
 
                 insertionSet.insert(instIndex, Mul64, origin, lhsLower, rhsLower);
                 insertionSet.insert(instIndex, Mul64, origin, lhsUpper, rhsUpper);
-                insertionSet.insert(instIndex, MoveZeroToVector, origin, tmp);
-                insertionSet.insert(instIndex, VectorReplaceLaneInt64, origin, Arg::imm(0), rhsLower, tmp);
-                insertionSet.insert(instIndex, VectorReplaceLaneInt64, origin, Arg::imm(1), rhsUpper, tmp);
-                insertionSet.insert(instIndex, MoveVector, origin, tmp, dst);
+                insertionSet.insert(instIndex, MoveZeroToVector, origin, dst);
+                insertionSet.insert(instIndex, VectorReplaceLaneInt64, origin, Arg::imm(0), rhsLower, dst);
+                insertionSet.insert(instIndex, VectorReplaceLaneInt64, origin, Arg::imm(1), rhsUpper, dst);
 
                 inst = Inst();
             };
Index: webkit2gtk-2.39.91/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp
===================================================================
--- webkit2gtk-2.39.91.orig/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp
+++ webkit2gtk-2.39.91/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp
@@ -6516,7 +6516,18 @@
 
     PartialResult WARN_UNUSED_RETURN addSIMDSplat(SIMDLane lane, ExpressionType value, ExpressionType& result)
     {
-        Location valueLocation = loadIfNecessary(value);
+        Location valueLocation;
+        if (value.isConst()) {
+            if (value.isFloat()) {
+                ScratchScope<0, 1> scratches(*this);
+                valueLocation = Location::fromFPR(scratches.fpr(0));
+            } else {
+                ScratchScope<1, 0> scratches(*this);
+                valueLocation = Location::fromGPR(scratches.gpr(0));
+            }
+            emitMoveConst(value, valueLocation);
+        } else
+            valueLocation = loadIfNecessary(value);
         consume(value);
 
         result = topValue(TypeKind::V128);
@@ -7010,11 +7021,11 @@
             return { };
         case JSC::SIMDLaneOperation::AnyTrue:
 #if CPU(ARM64)
-                m_jit.vectorUnsignedMax(SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, valueLocation.asFPR(), m_scratchFPR);
-                m_jit.moveFloatTo32(m_scratchFPR, resultLocation.asGPR());
-                m_jit.test32(ResultCondition::NonZero, resultLocation.asGPR(), resultLocation.asGPR(), resultLocation.asGPR());
+            m_jit.vectorUnsignedMax(SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, valueLocation.asFPR(), m_scratchFPR);
+            m_jit.moveFloatTo32(m_scratchFPR, resultLocation.asGPR());
+            m_jit.test32(ResultCondition::NonZero, resultLocation.asGPR(), resultLocation.asGPR(), resultLocation.asGPR());
 #else
-                m_jit.vectorAnyTrue(valueLocation.asFPR(), resultLocation.asGPR());
+            m_jit.vectorAnyTrue(valueLocation.asFPR(), resultLocation.asGPR());
 #endif
             return { };
         case JSC::SIMDLaneOperation::AllTrue:
@@ -7191,15 +7202,16 @@
             m_jit.vectorTruncSat(info, valueLocation.asFPR(), resultLocation.asFPR());
 #endif
             return { };
-        case JSC::SIMDLaneOperation::Not:
+        case JSC::SIMDLaneOperation::Not: {
 #if CPU(X86_64)
-                // Equality is always a single instruction for this lane, so it doesn't matter that our scratch register is the same as the operands.
-                m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, m_scratchFPR);
-                m_jit.vectorXor(info, valueLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
+            ScratchScope<0, 1> scratches(*this, valueLocation, resultLocation);
+            m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, scratches.fpr(0));
+            m_jit.vectorXor(info, valueLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
 #else
-                m_jit.vectorNot(info, valueLocation.asFPR(), resultLocation.asFPR());
+            m_jit.vectorNot(info, valueLocation.asFPR(), resultLocation.asFPR());
 #endif
             return { };
+        }
         case JSC::SIMDLaneOperation::Neg:
 #if CPU(X86_64)
             switch (info.lane) {
@@ -7286,35 +7298,43 @@
         // directly implement most relational conditions between vectors: the cases below
         // are best emitted as inversions of conditions that are supported.
         switch (relOp.asRelationalCondition()) {
-        case MacroAssembler::NotEqual:
+        case MacroAssembler::NotEqual: {
+            ScratchScope<0, 1> scratches(*this, leftLocation, rightLocation, resultLocation);
             m_jit.compareIntegerVector(RelationalCondition::Equal, info, leftLocation.asFPR(), rightLocation.asFPR(), resultLocation.asFPR(), m_scratchFPR);
-            m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, m_scratchFPR);
+            m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, scratches.fpr(0));
             m_jit.vectorXor(SIMDInfo { SIMDLane::v128, SIMDSignMode::None }, resultLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
             break;
-        case MacroAssembler::Above:
+        }
+        case MacroAssembler::Above: {
+            ScratchScope<0, 1> scratches(*this, leftLocation, rightLocation, resultLocation);
             m_jit.compareIntegerVector(RelationalCondition::BelowOrEqual, info, leftLocation.asFPR(), rightLocation.asFPR(), resultLocation.asFPR(), m_scratchFPR);
-            m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, m_scratchFPR);
+            m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, scratches.fpr(0));
             m_jit.vectorXor(SIMDInfo { SIMDLane::v128, SIMDSignMode::None }, resultLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
             break;
-        case MacroAssembler::Below:
+        }
+        case MacroAssembler::Below: {
+            ScratchScope<0, 1> scratches(*this, leftLocation, rightLocation, resultLocation);
             m_jit.compareIntegerVector(RelationalCondition::AboveOrEqual, info, leftLocation.asFPR(), rightLocation.asFPR(), resultLocation.asFPR(), m_scratchFPR);
-            m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, m_scratchFPR);
+            m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, scratches.fpr(0));
             m_jit.vectorXor(SIMDInfo { SIMDLane::v128, SIMDSignMode::None }, resultLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
             break;
+        }
         case MacroAssembler::GreaterThanOrEqual:
             if (info.lane == SIMDLane::i64x2) {
                 // Note: rhs and lhs are reversed here, we are semantically negating LessThan. GreaterThan is
                 // just better supported on AVX.
+                ScratchScope<0, 1> scratches(*this, leftLocation, rightLocation, resultLocation);
                 m_jit.compareIntegerVector(RelationalCondition::GreaterThan, info, rightLocation.asFPR(), leftLocation.asFPR(), resultLocation.asFPR(), m_scratchFPR);
-                m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, m_scratchFPR);
+                m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, scratches.fpr(0));
                 m_jit.vectorXor(SIMDInfo { SIMDLane::v128, SIMDSignMode::None }, resultLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
             } else
                 m_jit.compareIntegerVector(relOp.asRelationalCondition(), info, leftLocation.asFPR(), rightLocation.asFPR(), resultLocation.asFPR(), m_scratchFPR);
             break;
         case MacroAssembler::LessThanOrEqual:
             if (info.lane == SIMDLane::i64x2) {
+                ScratchScope<0, 1> scratches(*this, leftLocation, rightLocation, resultLocation);
                 m_jit.compareIntegerVector(RelationalCondition::GreaterThan, info, leftLocation.asFPR(), rightLocation.asFPR(), resultLocation.asFPR(), m_scratchFPR);
-                m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, m_scratchFPR);
+                m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::None }, m_scratchFPR, m_scratchFPR, m_scratchFPR, scratches.fpr(0));
                 m_jit.vectorXor(SIMDInfo { SIMDLane::v128, SIMDSignMode::None }, resultLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
             } else
                 m_jit.compareIntegerVector(relOp.asRelationalCondition(), info, leftLocation.asFPR(), rightLocation.asFPR(), resultLocation.asFPR(), m_scratchFPR);
@@ -7336,11 +7356,12 @@
             m_jit.vectorExtractLaneInt64(TrustedImm32(0), left.asFPR(), m_scratchGPR);
             m_jit.vectorExtractLaneInt64(TrustedImm32(0), right.asFPR(), m_dataScratchGPR);
             m_jit.mul64(m_scratchGPR, m_dataScratchGPR, m_scratchGPR);
-            m_jit.vectorReplaceLane(SIMDLane::i64x2, TrustedImm32(0), m_scratchGPR, result.asFPR());
+            m_jit.vectorReplaceLane(SIMDLane::i64x2, TrustedImm32(0), m_scratchGPR, m_scratchFPR);
             m_jit.vectorExtractLaneInt64(TrustedImm32(1), left.asFPR(), m_scratchGPR);
             m_jit.vectorExtractLaneInt64(TrustedImm32(1), right.asFPR(), m_dataScratchGPR);
             m_jit.mul64(m_scratchGPR, m_dataScratchGPR, m_scratchGPR);
-            m_jit.vectorReplaceLane(SIMDLane::i64x2, TrustedImm32(1), m_scratchGPR, result.asFPR());
+            m_jit.vectorReplaceLane(SIMDLane::i64x2, TrustedImm32(1), m_scratchGPR, m_scratchFPR);
+            m_jit.moveVector(m_scratchFPR, result.asFPR());
         } else
             m_jit.vectorMul(info, left.asFPR(), right.asFPR(), result.asFPR());
     }
